In [45]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import seaborn as sns 
import matplotlib.pyplot as plt

In [46]:
df = pd.read_csv("Groceries_dataset.csv")
df.head(3)

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit


In [47]:
# Extract unique items from the 'itemDescription' column

unique_items = df['itemDescription'].unique()

# Count the number of unique items

num_unique_items = len(unique_items)

# Print the result

print(f'Number of unique products: {num_unique_items}')

Number of unique products: 167


In [48]:
# Count the occurrences of each product

product_counts = df['itemDescription'].value_counts()

# Display the results

print("Product names and their counts:")

print(product_counts)

Product names and their counts:
itemDescription
whole milk               2502
other vegetables         1898
rolls/buns               1716
soda                     1514
yogurt                   1334
                         ... 
rubbing alcohol             5
bags                        4
baby cosmetics              3
kitchen utensil             1
preservation products       1
Name: count, Length: 167, dtype: int64


In [49]:
# Convert the data into the required format for the Apriori algorithm.

# We need to create a list of transactions where each transaction is a list of items.



transactions = df.groupby('Member_number')['itemDescription'].apply(list).values.tolist()

# Display the first few transactions

# print(transactions[:5])

In [50]:
te = TransactionEncoder()

# Transform the transaction data into a binary matrix

te_ary = te.fit(transactions).transform(transactions)

In [51]:
# Convert the binary matrix into a DataFrame.

df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

df_encoded.head(3)

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,False
1,False,False,False,False,False,False,False,False,True,False,...,False,False,False,True,False,True,False,True,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


In [74]:
# Apply the Apriori algorithm

frequent_itemsets = apriori(df_encoded, min_support=0.07, use_colnames=True).sort_values(by = ["support"])

# Display the frequent itemsets

print(frequent_itemsets)


# support value of 0.050026 means that the itemset appears in approximately 5.00% of the transactions in your dataset.
# support value of 0.050026 means that the itemset is found in about 5 out of every 100 transactions. 

     support                                itemsets
47  0.070292             (domestic eggs, whole milk)
69  0.071575               (root vegetables, yogurt)
49  0.071575              (other vegetables, pastry)
82  0.071832  (other vegetables, whole milk, yogurt)
45  0.071832              (rolls/buns, citrus fruit)
..       ...                                     ...
37  0.282966                                (yogurt)
32  0.313494                                  (soda)
28  0.349666                            (rolls/buns)
24  0.376603                      (other vegetables)
36  0.458184                            (whole milk)

[83 rows x 2 columns]


In [53]:
# Generate association rules

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

# Display the association rules

print(rules)

                                            antecedents         consequents  \
0                                            (UHT-milk)        (whole milk)   
1                                                (beef)        (whole milk)   
2                                        (bottled beer)        (whole milk)   
3                                       (bottled water)        (whole milk)   
4                                         (brown bread)        (whole milk)   
...                                                 ...                 ...   
1112  (rolls/buns, other vegetables, shopping bags, ...        (whole milk)   
1113    (rolls/buns, whole milk, shopping bags, yogurt)  (other vegetables)   
1114  (other vegetables, whole milk, shopping bags, ...        (rolls/buns)   
1115       (rolls/buns, other vegetables, soda, yogurt)        (whole milk)   
1116             (rolls/buns, whole milk, soda, yogurt)  (other vegetables)   

      antecedent support  consequent support   supp