In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = [['almonds','avocado','energy drink','tomato juice','green tea','salmon'],
        ['almonds','red wine','chocolate','avocado','french fries','green tea','salmon'],
        ['energy drink','avocado','tomato juice','champagne','chocolate','green tea'],
        ['tomato juice','spaghetti','champagne','red wine','shrimp','champagne'],
        ['tomato juice','cooking oil','tomato juice','mineral water','green tea'],
        ['avocado','chocolate','mineral water','salmon','cooking oil','salmon'],
        ['chocolate','green tea','chocolate','champagne','avocado']]

In [None]:
from mlxtend.preprocessing import TransactionEncoder

le = TransactionEncoder()
dataset = le.fit(data).transform(data)

In [None]:
le.columns_

['almonds',
 'avocado',
 'champagne',
 'chocolate',
 'cooking oil',
 'energy drink',
 'french fries',
 'green tea',
 'mineral water',
 'red wine',
 'salmon',
 'shrimp',
 'spaghetti',
 'tomato juice']

In [None]:
product_data = pd.DataFrame(dataset, columns = le.columns_)

In [None]:
product_data.head()

Unnamed: 0,almonds,avocado,champagne,chocolate,cooking oil,energy drink,french fries,green tea,mineral water,red wine,salmon,shrimp,spaghetti,tomato juice
0,True,True,False,False,False,True,False,True,False,False,True,False,False,True
1,True,True,False,True,False,False,True,True,False,True,True,False,False,False
2,False,True,True,True,False,True,False,True,False,False,False,False,False,True
3,False,False,True,False,False,False,False,False,False,True,False,True,True,True
4,False,False,False,False,True,False,False,True,True,False,False,False,False,True


In [None]:
product_data.shape

(7, 14)

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules


# Determining the frequent factors
frequent_factors = apriori(product_data, use_colnames = True, min_support = 0.1, max_len = 4)

# filtering the factors on the basis of conditions
rules = association_rules(frequent_factors, metric ='lift', min_threshold = 1)

#Displaying the first 10 records
print(rules.head(10).sort_values(by='confidence', ascending = False))

      antecedents     consequents  antecedent support  consequent support  \
1       (almonds)       (avocado)            0.285714            0.714286   
4  (french fries)       (almonds)            0.142857            0.285714   
7       (almonds)     (green tea)            0.285714            0.714286   
2  (energy drink)       (almonds)            0.285714            0.285714   
3       (almonds)  (energy drink)            0.285714            0.285714   
5       (almonds)  (french fries)            0.285714            0.142857   
8      (red wine)       (almonds)            0.285714            0.285714   
9       (almonds)      (red wine)            0.285714            0.285714   
0       (avocado)       (almonds)            0.714286            0.285714   
6     (green tea)       (almonds)            0.714286            0.285714   

    support  confidence  lift  leverage  conviction  
1  0.285714         1.0  1.40  0.081633         inf  
4  0.142857         1.0  3.50  0.102041     

In [None]:
apriori(product_data,use_colnames=True,min_support=0.5)

Unnamed: 0,support,itemsets
0,0.714286,(avocado)
1,0.571429,(chocolate)
2,0.714286,(green tea)
3,0.571429,(tomato juice)
4,0.571429,"(chocolate, avocado)"
5,0.571429,"(green tea, avocado)"


In [None]:
frequent_itemsets=apriori(product_data,min_support=0.3,use_colnames=True)
frequent_itemsets['length']=frequent_itemsets['itemsets'].apply(lambda x:len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.714286,(avocado),1
1,0.428571,(champagne),1
2,0.571429,(chocolate),1
3,0.714286,(green tea),1
4,0.428571,(salmon),1
5,0.571429,(tomato juice),1
6,0.571429,"(chocolate, avocado)",2
7,0.571429,"(green tea, avocado)",2
8,0.428571,"(avocado, salmon)",2
9,0.428571,"(green tea, chocolate)",2


In [None]:
frequent_itemsets[(frequent_itemsets['length']>=2)&(frequent_itemsets['support']>=0.4)]

Unnamed: 0,support,itemsets,length
6,0.571429,"(chocolate, avocado)",2
7,0.571429,"(green tea, avocado)",2
8,0.428571,"(avocado, salmon)",2
9,0.428571,"(green tea, chocolate)",2
10,0.428571,"(green tea, tomato juice)",2
11,0.428571,"(green tea, chocolate, avocado)",3


In [None]:
frequent_itemsets[(frequent_itemsets['itemsets']=={'green tea','chocolate'})]

Unnamed: 0,support,itemsets,length
9,0.428571,"(green tea, chocolate)",2


In [None]:
target='{\'green tea\'}'
result_greentea=rules[rules['consequents'].astype(str).str.contains(target,na=False)].sort_values(by='confidence',ascending=False)
result_greentea.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
7,(almonds),(green tea),0.285714,0.714286,0.285714,1.0,1.4,0.081633,inf
867,"(french fries, almonds, salmon)",(green tea),0.142857,0.714286,0.142857,1.0,1.4,0.040816,inf
1020,"(energy drink, chocolate, avocado)",(green tea),0.142857,0.714286,0.142857,1.0,1.4,0.040816,inf
986,"(avocado, tomato juice, champagne)",(green tea),0.142857,0.714286,0.142857,1.0,1.4,0.040816,inf
958,"(energy drink, avocado, champagne)",(green tea),0.142857,0.714286,0.142857,1.0,1.4,0.040816,inf
935,"(chocolate, avocado, champagne)",(green tea),0.285714,0.714286,0.285714,1.0,1.4,0.081633,inf
908,"(salmon, almonds, tomato juice)",(green tea),0.142857,0.714286,0.142857,1.0,1.4,0.040816,inf
895,"(red wine, almonds, salmon)",(green tea),0.142857,0.714286,0.142857,1.0,1.4,0.040816,inf
853,"(red wine, french fries, almonds)",(green tea),0.142857,0.714286,0.142857,1.0,1.4,0.040816,inf
1085,"(chocolate, avocado, red wine)",(green tea),0.142857,0.714286,0.142857,1.0,1.4,0.040816,inf
