# Market Basket Analysis

In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import association_rules

pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 800)

In [2]:
# List containing the elements of each purchase
dataset = [['Milk', 'Water', 'Wheat', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Wheat', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple'],
           ['Milk', 'Apple', 'Wheat'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs', 'Wheat'],
           ['Milk', 'Apple', 'Honey', 'Eggs'],
           ['Milk', 'Beer', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [3]:
# Transaction Encoder transforms the dataset into a matrix where each row(purchase) has bool values for all of the possible elements
te = TransactionEncoder()

# Inserting the dataset into TE and creating a dataframe
model = te.fit(dataset)
trans_model = model.transform(dataset)
df = pd.DataFrame(trans_model, columns=te.columns_)
df

Unnamed: 0,Apple,Beer,Corn,Dill,Eggs,Honey,Ice cream,Kidney Beans,Milk,Onion,Water,Wheat,Yogurt
0,False,False,False,False,True,False,False,True,True,False,True,True,True
1,False,False,False,True,True,False,False,True,False,True,False,True,True
2,True,False,False,False,False,False,False,False,True,False,False,False,False
3,True,False,False,False,False,False,False,False,True,False,False,True,False
4,True,False,False,False,True,False,False,True,True,False,False,True,False
5,True,False,False,False,True,True,False,False,True,False,False,False,False
6,False,True,True,False,False,False,False,True,True,False,False,False,True
7,False,False,True,False,True,False,True,True,False,True,False,False,False


In [7]:
# Support = amount of rows which contain the itemset / total amount of rows
freq_itemset = apriori(df, min_support=0.2, use_colnames=True)
print(freq_itemset.sort_values("support", ascending=False)[:5])

   support        itemsets
4    0.750          (Milk)
2    0.625          (Eggs)
3    0.625  (Kidney Beans)
0    0.500         (Apple)
6    0.500         (Wheat)


In [6]:
# Antecedents: A purchase containing a set of A items
# Consequents: has following metrics on the purchase also containing B
# Antecedent and consequent support: support for A and for B    --> Each xth purchase contains A; Each yth purchase contains B
# Support: support for A union B                                --> Each xth purchase contains A union B
# Confidence: (A intersection B) / A                            --> Each xth purchase of A, B is purchased as well
# Lift: confidence / consequent support                         --> lift > 1: Presence of A increases probability of purchase containing B. Strength of association between A and B

rules = association_rules(freq_itemset, metric="lift", min_threshold=2)
print(rules.iloc[:,:7])

                     antecedents                   consequents  antecedent support  consequent support  support  confidence      lift
0           (Kidney Beans, Eggs)                       (Onion)               0.500               0.250     0.25    0.500000  2.000000
1                        (Onion)          (Kidney Beans, Eggs)               0.250               0.500     0.25    1.000000  2.000000
2                 (Eggs, Yogurt)                       (Wheat)               0.250               0.500     0.25    1.000000  2.000000
3                        (Wheat)                (Eggs, Yogurt)               0.500               0.250     0.25    0.500000  2.000000
4     (Kidney Beans, Eggs, Milk)                       (Wheat)               0.250               0.500     0.25    1.000000  2.000000
5                        (Wheat)    (Kidney Beans, Eggs, Milk)               0.500               0.250     0.25    0.500000  2.000000
6   (Kidney Beans, Eggs, Yogurt)                       (Wheat)

<b>Interpretation of the table above:</b>
* Kidney Beans and Eggs are bought together in 50% of all purchases. Onions are purchased 25% of the time.
* When someone purchases Kidney Beans and Eggs together, they are 2 times as likely than normal to purchase Onions as well.

<b>Knowing which products are bought together enables Cross-Selling:</b>
* Stores can place those items close to each other.
* Online stores can notify the customer about 'What other customers purchased'.