In [1]:
import pandas as pd
from apyori import apriori
import numpy as np

In [2]:
data=pd.read_csv('./Data/store_data.csv',header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [3]:
data.shape

(7501, 20)

In [4]:
data.values

array([['shrimp', 'almonds', 'avocado', ..., 'frozen smoothie',
        'spinach', 'olive oil'],
       ['burgers', 'meatballs', 'eggs', ..., nan, nan, nan],
       ['chutney', nan, nan, ..., nan, nan, nan],
       ...,
       ['chicken', nan, nan, ..., nan, nan, nan],
       ['escalope', 'green tea', nan, ..., nan, nan, nan],
       ['eggs', 'frozen smoothie', 'yogurt cake', ..., nan, nan, nan]],
      dtype=object)

In [5]:
#### Converting dataFrame to nested list
data_list=[[x for x in y if str(x)!='nan'] for y in data.values]
print("length :",len(data_list))
print(data_list[0:2])

length : 7501
[['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil'], ['burgers', 'meatballs', 'eggs']]


# Note on apriori():
* The class apriori() takes first argument as transactions. The transcation variable must be an iterable object.
* The other aruguments are:
    * **min_Support**: The minimum support of relations to be taken as threshold
    * **min_confidence**: The minimum confidence threshold
    * **min_lift**: The minimum lift of relations
    * **max_length**: The maximum length of the relations

In [6]:
association_rules=apriori(data_list,min_support=0.0045,min_confidence=0.2,min_lift=3,max_length=3)

association_rules=list(association_rules)
print(len(association_rules))

23


#### Output of apriori()
* The apriori() results in RelationRecord. The RelationRecord reflects a subset of items, while ordered _statistics is a list of OrderedStatistics, which reflect the rules. Each OrderedStatistics, items base is the antecedent and the items add is the conseauent. The support is stored in the RelationRecord, since it's the same for the contained rules.
* Asample output would be -
```
RelationRecord(items=frozenset({'item1', 'item2'}), support=0.15365410803449842, ordered_statistics=OrderedStatistic(items_base=frozenset({'item1'}), itms_add=frozenset ({'item2'}), confidence=0.6203428891875382, lift=2.2233410344037092), OrderedStatistic(items_base=frozenset({'item2'}), items_add=frozenset({'item1'}), confidence=0.5507049891540131, lift=2.2233410344037e97) ])
```
* The meaning of this would be -
```
item1 -> item2 with @.62 confidence and 2.2233410344037092 lift
item2 -> item1 with @.55 confidence and 2.2233410344937097 lift
Both have Support=0. 15365410803449842.
```

In [7]:
print(association_rules[0])

RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)])


In [8]:
print(association_rules[7])

RelationRecord(items=frozenset({'frozen vegetables', 'shrimp', 'chocolate'}), support=0.005332622317024397, ordered_statistics=[OrderedStatistic(items_base=frozenset({'frozen vegetables', 'chocolate'}), items_add=frozenset({'shrimp'}), confidence=0.23255813953488375, lift=3.2545123221103784), OrderedStatistic(items_base=frozenset({'shrimp', 'chocolate'}), items_add=frozenset({'frozen vegetables'}), confidence=0.29629629629629634, lift=3.1084175084175087)])


RelationRecord(
* items=frozenset({'chocolate', 'shrimp', 'frozen vegetables'}), 
* support=0.005332622317024397, 
* ordered_statistics = [ 
    * OrderedStatistic(
        <br>- items_base=frozenset({'chocolate', 'frozen vegetables'}),
        <br>- items_add=frozenset({'shrimp'}),
        <br>- confidence=0.23255813953488375, 
        <br>- lift=3.2545123221103784), 
    * OrderedStatistic(
        <br>- items_base=frozenset({'chocolate', 'shrimp'}),
        <br>- items_add=frozenset({'frozen vegetables'}), 
        <br>- confidence=0.29629629629629634,
        <br>- lift=3.1084175084175087)])

In [9]:
#### Converting all the rules to dataFrame
association_rules_list=[]
for i in range(len(association_rules)):
    for j in range(0,len(association_rules[i][2])):
        association_rules_list.append([association_rules[i][0],association_rules[i][2][j][1],
                                       association_rules[i][2][j][0],association_rules[i][1],
                                       association_rules[i][2][j][2],association_rules[i][2][j][3]])

final_df=pd.DataFrame(association_rules_list,columns=['Items','Antecedent','Consequent','Support','Confidence','Lift'])
print(final_df.shape)
final_df.head()

(27, 6)


Unnamed: 0,Items,Antecedent,Consequent,Support,Confidence,Lift
0,"(light cream, chicken)",(chicken),(light cream),0.004533,0.290598,4.843951
1,"(mushroom cream sauce, escalope)",(escalope),(mushroom cream sauce),0.005733,0.300699,3.790833
2,"(pasta, escalope)",(escalope),(pasta),0.005866,0.372881,4.700812
3,"(herb & pepper, ground beef)",(ground beef),(herb & pepper),0.015998,0.32345,3.291994
4,"(ground beef, tomato sauce)",(ground beef),(tomato sauce),0.005333,0.377358,3.840659


In [10]:
#### Another way convert all the rules to dataFrame
df=pd.DataFrame(columns=('Items','Antecedent','Consequent','Support','Confidence','Lift'))

Support=[]
Confidence=[]
Lift=[]
Items=[]
Antecedent=[]
Consequent=[]

for RelationRecord in association_rules:
    for ordered_stat in RelationRecord.ordered_statistics:
        Support.append(RelationRecord.support)
        Items.append(RelationRecord.items)
        Antecedent.append(ordered_stat.items_base)
        Consequent.append(ordered_stat.items_add)
        Confidence.append(ordered_stat.confidence)
        Lift.append(ordered_stat.lift)
df['Items']=list(map(set,Items))
df['Antecedent']=list(map(set,Antecedent))
df['Consequent']=list(map(set,Consequent))
df['Support']=Support
df['Confidence']=Confidence
df['Lift']=Lift
print(df.shape)
df.head()

(27, 6)


Unnamed: 0,Items,Antecedent,Consequent,Support,Confidence,Lift
0,"{light cream, chicken}",{light cream},{chicken},0.004533,0.290598,4.843951
1,"{mushroom cream sauce, escalope}",{mushroom cream sauce},{escalope},0.005733,0.300699,3.790833
2,"{pasta, escalope}",{pasta},{escalope},0.005866,0.372881,4.700812
3,"{herb & pepper, ground beef}",{herb & pepper},{ground beef},0.015998,0.32345,3.291994
4,"{ground beef, tomato sauce}",{tomato sauce},{ground beef},0.005333,0.377358,3.840659
