In [None]:
# Reference

# https://www.analyticsvidhya.com/blog/2021/10/a-comprehensive-guide-on-market-basket-analysis/
# http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/


In [86]:
# Market Basket Analysis 


# need to understand at least first three metrices

# 1. Support
# -> Fractin of transaction that contains item A if  1 or maybe (A and B if 2)
# formula 
# value (a) / total
# range: [0,1]

# 2. Confidence
# how often item b appear in transaction that contain A only
# other word, ratio of item b bought if item a also bought (ratio in a)
# formula
# (a -> b) / a
# range: [0,1]

# 3. lift
# how much confidence will be lift (increase) for purchasing item b if item a purchased
# formula
# (confidence (a -> b) / support(b)) 
# range: [0,∞]


# 4. Leverage
#  computes the difference between the observed frequency of A and C appearing together
#  formula
# (a -> b) = support(a) x support(b)
# range: [−1,1]

# 5. Conviction
# the dependancy of consequent toward the antecedent
# formula
# (a -> b) = (1 - support(b) / 1 - confidence(a -> b))
# range: [0,∞]




# Import Modules

In [None]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules


import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Dataset Preparation

In [88]:
df = pd.read_csv("Basket(Clean).csv")

In [89]:
df.head()

Unnamed: 0,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE POSY,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE SKULLS,...,YULETIDE IMAGES GIFT WRAP SET,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC BOX SIGN HOME,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [94]:
#converting all positive vaues to 1 and everything else to 0

def my_encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_df = df.applymap(my_encode_units)

In [95]:
basket_df.shape

(457, 1695)

# Applying Market Basket Analysis

In [99]:
# 1. Specify the minimum support, confidence and lift want

SUPPORT = 0.05
CONFIDENCE = 0.5
LIFT = 1

In [103]:
# 2. Generatig possible frequent items using the apriori algorithm

frequent_items = apriori(basket_df, min_support= SUPPORT, use_colnames=True).sort_values(ascending=False, by = "support")

frequent_items.head()

Unnamed: 0,support,itemsets
25,0.818381,(POSTAGE)
36,0.245077,(ROUND SNACK BOXES SET OF4 WOODLAND)
84,0.225383,"(ROUND SNACK BOXES SET OF4 WOODLAND, POSTAGE)"
35,0.157549,(ROUND SNACK BOXES SET OF 4 FRUITS)
83,0.150985,"(ROUND SNACK BOXES SET OF 4 FRUITS, POSTAGE)"


In [107]:
# 3. Generating all the possible rules based on the LIFT 

item_rules = association_rules(frequent_items, metric = "lift", min_threshold = LIFT).sort_values(ascending=False, by = "lift")

In [108]:
# viewing
item_rules.shape

# there are 120 rules has been made

(120, 9)

In [111]:
#viewing top 100 rules
item_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
92,"(POSTAGE, WOODLAND CHARLOTTE BAG)",(RED RETROSPOT CHARLOTTE BAG),0.115974,0.070022,0.054705,0.471698,6.736439,0.046584,1.760316
93,(RED RETROSPOT CHARLOTTE BAG),"(POSTAGE, WOODLAND CHARLOTTE BAG)",0.070022,0.115974,0.054705,0.78125,6.736439,0.046584,4.041263
65,(WOODLAND CHARLOTTE BAG),(RED RETROSPOT CHARLOTTE BAG),0.126915,0.070022,0.059081,0.465517,6.648168,0.050194,1.739959
64,(RED RETROSPOT CHARLOTTE BAG),(WOODLAND CHARLOTTE BAG),0.070022,0.126915,0.059081,0.84375,6.648168,0.050194,5.587746
95,(WOODLAND CHARLOTTE BAG),"(RED RETROSPOT CHARLOTTE BAG, POSTAGE)",0.126915,0.065646,0.054705,0.431034,6.566092,0.046373,1.642199


# Implementation

In [167]:
# Make a copy of the association rules

suggestion_df = item_rules.copy()

In [168]:
# Convert the antecedents and consequents into set type (because default type is frozenset)

suggestion_df['antecedents'] = suggestion_df['antecedents'].apply(lambda x: set(x))
suggestion_df['consequents'] = suggestion_df['consequents'].apply(lambda x: set(x))


suggestion_df.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
92,"{POSTAGE, WOODLAND CHARLOTTE BAG}",{RED RETROSPOT CHARLOTTE BAG},0.115974,0.070022,0.054705,0.471698,6.736439,0.046584,1.760316
93,{RED RETROSPOT CHARLOTTE BAG},"{POSTAGE, WOODLAND CHARLOTTE BAG}",0.070022,0.115974,0.054705,0.78125,6.736439,0.046584,4.041263
65,{WOODLAND CHARLOTTE BAG},{RED RETROSPOT CHARLOTTE BAG},0.126915,0.070022,0.059081,0.465517,6.648168,0.050194,1.739959
64,{RED RETROSPOT CHARLOTTE BAG},{WOODLAND CHARLOTTE BAG},0.070022,0.126915,0.059081,0.84375,6.648168,0.050194,5.587746
95,{WOODLAND CHARLOTTE BAG},"{RED RETROSPOT CHARLOTTE BAG, POSTAGE}",0.126915,0.065646,0.054705,0.431034,6.566092,0.046373,1.642199


In [169]:
suggestion_df['antecedents'].iloc[0]

{'POSTAGE', 'WOODLAND CHARLOTTE BAG'}

In [None]:
df.loc[df['channel'].isin(['sale','fullprice'])]

In [173]:
search = {'POSTAGE', 'WOODLAND CHARLOTTE BAG'}

suggestion_df.loc[suggestion_df["antecedents"] == search]



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
92,"{POSTAGE, WOODLAND CHARLOTTE BAG}",{RED RETROSPOT CHARLOTTE BAG},0.115974,0.070022,0.054705,0.471698,6.736439,0.046584,1.760316
68,"{POSTAGE, WOODLAND CHARLOTTE BAG}",{ROUND SNACK BOXES SET OF4 WOODLAND},0.115974,0.245077,0.059081,0.509434,2.078673,0.030659,1.538882


In [234]:
# Some implementation that can be made from the association rules


def get_item(item): #make sure the item is in shape of set
    
    
    index = 1
    
    # get only the consequents that has the highest lift value
    item_return = suggestion_df.loc[suggestion_df["antecedents"] == item].sort_values(by="lift", ascending=False)[:1]
    
    
    item_recommendation = item_return['consequents'].iloc[0]
    lift_value = item_return['lift'].iloc[0]
    
    
    # return item recommendation
    print(f" ---------------------")
    print(f"  Recommendation Item ")
    print(f" ---------------------")
    for item in item_recommendation:
        print(f"{index}. {item}")
        index = index + 1
    print("")
    
    # return lift value 
    print(f" ------------")
    print("  Lift Value ")
    print(f" ------------")
    print(f"-> {round(lift_value,4)}")

In [236]:
# Try with a few value

value_1 = {'POSTAGE', 'WOODLAND CHARLOTTE BAG'}
value_2 = {"POSTAGE"}
value_3 = {"RED RETROSPOT CHARLOTTE BAG"}


get_item(value_1)
print("")
print("")
get_item(value_2)
print("")
print("")
get_item(value_3)

 ---------------------
  Recommendation Item 
 ---------------------
1. RED RETROSPOT CHARLOTTE BAG

 ------------
  Lift Value 
 ------------
-> 6.7364


 ---------------------
  Recommendation Item 
 ---------------------
1. PLASTERS IN TIN STRONGMAN

 ------------
  Lift Value 
 ------------
-> 1.1837


 ---------------------
  Recommendation Item 
 ---------------------
1. POSTAGE
2. WOODLAND CHARLOTTE BAG

 ------------
  Lift Value 
 ------------
-> 6.7364
