# Market Basket Optimisation

## Association Rule Learning - Apriori

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Reading Dataset

In [2]:
dataset = pd.read_csv("Market_Basket_Optimisation.csv",header=None)

In [4]:
dataset.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


### Preprocessing

#### Creating list of lists(transactions) + excluding 'nan'

In [5]:
transactions = []
for i in range(0,dataset.shape[0]):
    transactions.append([str(dataset.values[i,j]) for j in range(0,dataset.shape[1]) if str(dataset.values[i,j]) != 'nan'])

### Apriori Algorithm

!["Apriori"](./img/Apriori.png)

### Training the Apriori Algorithm on the dataset

#### Setting min_support according to business problem -- Assume item purchased 3 times per day
#### 3 * 7 = 21 times per week --> I want items having min_support > (3*7)/7500 = 0.003

In [14]:
from apyori import apriori
rules = apriori(transactions=transactions,min_support=0.003,min_confidence=0.2,min_left=3,min_length=2,max_length=2)

In [16]:
results = list(rules)

### Organize the results into a Pandas DataFrame

In [19]:
def inspect(results):
    lhs         = [tuple(result[2][0][0]) for result in results]
    rhs         = [tuple(result[2][0][1]) for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
results_df = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [20]:
results_df.head(10)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,(),"(mineral water,)",0.238368,0.238368,1.0
1,"(almonds,)","(burgers,)",0.005199,0.254902,2.923577
2,"(almonds,)","(chocolate,)",0.005999,0.294118,1.795099
3,"(almonds,)","(eggs,)",0.006532,0.320261,1.782108
4,"(almonds,)","(french fries,)",0.004399,0.215686,1.261983
5,"(almonds,)","(green tea,)",0.005066,0.248366,1.879913
6,"(almonds,)","(milk,)",0.005199,0.254902,1.967098
7,"(almonds,)","(mineral water,)",0.007599,0.372549,1.562914
8,"(almonds,)","(spaghetti,)",0.005999,0.294118,1.689262
9,"(avocado,)","(chocolate,)",0.007066,0.212,1.293907


### Getting the top 10

In [26]:
results_df.nlargest(10,columns='Lift')

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
161,"(fromage blanc,)","(honey,)",0.003333,0.245098,5.164271
59,"(light cream,)","(chicken,)",0.004533,0.290598,4.843951
134,"(pasta,)","(escalope,)",0.005866,0.372881,4.700812
261,"(pasta,)","(shrimp,)",0.005066,0.322034,4.506672
258,"(whole wheat pasta,)","(olive oil,)",0.007999,0.271493,4.12241
192,"(tomato sauce,)","(ground beef,)",0.005333,0.377358,3.840659
133,"(mushroom cream sauce,)","(escalope,)",0.005733,0.300699,3.790833
183,"(herb & pepper,)","(ground beef,)",0.015998,0.32345,3.291994
205,"(light cream,)","(olive oil,)",0.0032,0.205128,3.11471
1,"(almonds,)","(burgers,)",0.005199,0.254902,2.923577


#### For example: formage blanc --> honey 
#### Observing both 'formage blanc with honey'(ordered together) in the dataset with percentage of 0.003 (Support)
#### There is 24% confidence that a person buys 'formage blanc' would also buy 'honey' (Confidence)
#### Lift -> interpreted as to the increase in the ratio of sale of 'honey' when 'formage blanc' is sold or How likely 'honey' is soled when 'formage blanc' is sold while controlling for how popular 'honey'.