## Importing the libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import re

import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 12, 7

from apyori import apriori

# import warnings
# warnings.simplefilter(action='ignore', category=FutureWarning)

## Importing the dataset

In [2]:
df = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [3]:
df.shape

(7501, 20)

## Converting dataset into the form needed by the algorithm

In [4]:
# Algorithm needs list of lists
transactions = []
for i in range(0, df.shape[0]):
    transactions.append([str(df.values[i,j]) for j in range(0, df.shape[1])])

In [5]:
transactions[0]

['shrimp',
 'almonds',
 'avocado',
 'vegetables mix',
 'green grapes',
 'whole weat flour',
 'yams',
 'cottage cheese',
 'energy drink',
 'tomato juice',
 'low fat yogurt',
 'green tea',
 'honey',
 'salad',
 'mineral water',
 'salmon',
 'antioxydant juice',
 'frozen smoothie',
 'spinach',
 'olive oil']

## Apriori

<pre>
Let X,Y be itemsets, X->Y an association rule and T a set of transactions of a given database.

Support
    It is an indication of how frequently the itemset appears in the dataset.
    supp(Y) = (No. of appearance of 'Y')/(Total no. of transactions)
    
Confidence
    It is an indication of how often the rule has been found to be true.
    conf(X->Y) = (No. of appearance of 'X & Y')/(No. of appearance of 'X')
    conf(X->Y) = supp(X&Y)/supp(X)
    
Lift
    It is the ratio of the observed support to that expected if X and Y were independent.
    lift(X->Y) = conf(X->Y)/supp(Y)
    lift = 1 => no rule can be drawn (independent)
    lift > 1 => positive effect
    lift < 1 => negative effect
    
Conviction
    It can be interpreted as the ratio of the expected frequency that X occurs without Y.
    It is the frequency that the rule makes an incorrect prediction.
    conv(X->Y) = (1-supp(Y))/(1-conf(X->Y))
    
Rule Power Factor (RPF)
    It is an indication of how intense a rule’s items are associated with each other in terms of positive relationship.
    It is the measure of importance.
    rpf(X->Y) = conf(X->Y)*supp(X&Y)
</pre>

In [6]:
# from apyori import apriori
rules = apriori(transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2)

In [7]:
results = list(rules)

In [8]:
results[0]

RelationRecord(items=frozenset({'chicken', 'light cream'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)])

In [9]:
dt = pd.DataFrame(columns=['Item_Base','Item_Add','Support','Confidence','Lift'], dtype=object)

In [10]:
for i in range(len(results)):
    dt.at[i,'Item_Base'] = list(results[i][2][0][0])
    dt.at[i,'Item_Add'] = list(results[i][2][0][1])
    dt.at[i,'Support'] = results[i][1]
    dt.at[i,'Confidence'] = results[i][2][0][2]
    dt.at[i,'Lift'] = results[i][2][0][3]

In [11]:
dt

Unnamed: 0,Item_Base,Item_Add,Support,Confidence,Lift
0,[light cream],[chicken],0.00453273,0.290598,4.84395
1,[mushroom cream sauce],[escalope],0.00573257,0.300699,3.79083
2,[pasta],[escalope],0.00586588,0.372881,4.70081
3,[fromage blanc],[honey],0.00333289,0.245098,5.16427
4,[herb & pepper],[ground beef],0.0159979,0.32345,3.29199
5,[tomato sauce],[ground beef],0.00533262,0.377358,3.84066
6,[light cream],[olive oil],0.00319957,0.205128,3.11471
7,[whole wheat pasta],[olive oil],0.00799893,0.271493,4.12241
8,[pasta],[shrimp],0.00506599,0.322034,4.50667
9,"[spaghetti, avocado]",[milk],0.00333289,0.416667,3.21545
