In [17]:
import pandas as pd

# Load the dataset
df = pd.read_csv('supermarket.csv')


In [18]:
# Opt into the future behavior
pd.set_option('future.no_silent_downcasting', True)

In [19]:
# Replace 't' with 1 and '?' with 0
df = df.replace({'t': 1, '?': 0})

print(df)

     department1 department2 department3 department4 department5 department6  \
0              0           0           0           0           0           0   
1              1           0           0           0           0           0   
2              0           0           0           0           0           0   
3              1           0           0           0           0           0   
4              0           0           0           0           0           0   
...          ...         ...         ...         ...         ...         ...   
4622           0           0           0           0           0           0   
4623           0           0           0           1           0           0   
4624           0           0           0           0           0           0   
4625           0           0           0           0           0           0   
4626           1           0           0           0           0           0   

     department7 department8 department

In [20]:
df.head()


Unnamed: 0,department1,department2,department3,department4,department5,department6,department7,department8,department9,'grocery misc',...,department208,department209,department210,department211,department212,department213,department214,department215,department216,total
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,high
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,low
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,low
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,low
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,low


In [21]:
# Drop the 'total' column as it's not needed for the apriori algorithm
df = df.drop(columns=['total'])


In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4627 entries, 0 to 4626
Columns: 216 entries, department1 to department216
dtypes: object(216)
memory usage: 7.6+ MB


In [23]:
# Convert the DataFrame to boolean type
df = df.astype(bool)

#### Apriori Algorithm

In [24]:
from mlxtend.frequent_patterns import apriori

# Apply the apriori algorithm
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)

print(frequent_itemsets)


     support                        itemsets
0   0.719689              ('bread and cake')
1   0.604063                ('baking needs')
2   0.532310             (juice-sat-cord-ms)
3   0.563000                      (biscuits)
4   0.587206                ('frozen foods')
5   0.503566           ('party snack foods')
6   0.635185                    (milk-cream)
7   0.640156                         (fruit)
8   0.639939                    (vegetables)
9   0.505079  ('bread and cake', milk-cream)
10  0.502485       ('bread and cake', fruit)


In [26]:
from mlxtend.frequent_patterns import apriori, association_rules


In [27]:
# Generate the association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

print("\nAssociation Rules:")
print(rules)


Association Rules:
          antecedents         consequents  antecedent support  \
0  ('bread and cake')        (milk-cream)            0.719689   
1        (milk-cream)  ('bread and cake')            0.635185   
2             (fruit)  ('bread and cake')            0.640156   

   consequent support   support  confidence      lift  leverage  conviction  \
0            0.635185  0.505079    0.701802  1.104878  0.047944    1.223398   
1            0.719689  0.505079    0.795168  1.104878  0.047944    1.368496   
2            0.719689  0.502485    0.784943  1.090670  0.041773    1.303425   

   zhangs_metric  
0       0.338634  
1       0.260194  
2       0.231022  


#### FP Growth Algorithm

In [41]:
from mlxtend.frequent_patterns import fpgrowth

# Apply the FP-Growth algorithm
frequent_itemsets_fp = fpgrowth(df, min_support=0.5, use_colnames=True)

print(frequent_itemsets_fp)


     support                        itemsets
0   0.719689              ('bread and cake')
1   0.640156                         (fruit)
2   0.639939                    (vegetables)
3   0.635185                    (milk-cream)
4   0.604063                ('baking needs')
5   0.587206                ('frozen foods')
6   0.563000                      (biscuits)
7   0.532310             (juice-sat-cord-ms)
8   0.503566           ('party snack foods')
9   0.502485       ('bread and cake', fruit)
10  0.505079  ('bread and cake', milk-cream)


In [43]:
# Generate the association rules
rules = association_rules(frequent_itemsets_fp, metric="confidence", min_threshold=0.7)

print("\nAssociation Rules:")
print(rules)


Association Rules:
          antecedents         consequents  antecedent support  \
0             (fruit)  ('bread and cake')            0.640156   
1  ('bread and cake')        (milk-cream)            0.719689   
2        (milk-cream)  ('bread and cake')            0.635185   

   consequent support   support  confidence      lift  leverage  conviction  \
0            0.719689  0.502485    0.784943  1.090670  0.041773    1.303425   
1            0.635185  0.505079    0.701802  1.104878  0.047944    1.223398   
2            0.719689  0.505079    0.795168  1.104878  0.047944    1.368496   

   zhangs_metric  
0       0.231022  
1       0.338634  
2       0.260194  


### Interesting Rules with Support, Confidence, and Lift

1. **Rule 1: ('milk-cream') -> ('bread and cake')**
   - **Support**: The support value is 0.505, indicating that this association occurs in approximately 50.5% of transactions.
   - **Confidence**: The confidence is high at 0.795, meaning that when customers purchase milk and cream together, there's a strong likelihood (79.5%) they will also buy bread and cake.
   - **Lift**: The lift value is 1.105, which suggests that the likelihood of purchasing bread and cake when buying milk and cream is 1.105 times higher than if the two items were purchased independently.
   - **Justification**: With a high confidence and lift, this rule indicates a significant association between milk-cream and bread-cake purchases. It suggests a strong cross-category preference among customers, making it a compelling insight for strategic product placement and promotional campaigns in supermarkets.

2. **Rule 2: ('bread and cake') -> ('milk-cream')**
   - **Support**: The support value is also 0.505, indicating a similar occurrence rate in transactions as Rule 1.
   - **Confidence**: The confidence is 0.702, suggesting that customers who buy bread and cake have a 70.2% likelihood of also purchasing milk and cream.
   - **Lift**: The lift value of 1.105 indicates a reciprocal relationship similar to Rule 1, where the presence of bread and cake increases the likelihood of milk and cream being purchased together.
   - **Justification**: Despite a slightly lower confidence compared to Rule 1, this rule still highlights a significant association between bread-cake and milk-cream purchases. It underscores a consistent consumer behavior pattern that can inform product placement strategies and promotional bundling tactics.

3. **Rule 3: ('fruit') -> ('bread and cake')**
   - **Support**: The support value is 0.502, indicating that this association occurs in approximately 50.2% of transactions.
   - **Confidence**: The confidence is 0.785, indicating that customers who purchase fruits have a 78.5% likelihood of also purchasing bread and cake.
   - **Lift**: The lift value is 1.091, which suggests a moderate association where the likelihood of purchasing bread and cake when buying fruits is 1.091 times higher than if purchased independently.
   - **Justification**: This rule indicates a notable association between healthier food choices (fruits) and bakery items (bread-cake). It suggests opportunities for promoting balanced shopping habits and creating cross-category promotions that appeal to health-conscious consumers while boosting sales in bakery departments.

### Conclusion

These rules are selected based on their meaningful support, high confidence, and lift values, indicating strong associations and practical implications for supermarket operations. They provide actionable insights into consumer behavior that can guide marketing strategies, product assortments, and promotional activities to enhance customer satisfaction and drive sales.