In [1]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd
import numpy as np

In [2]:
# Define the toy dataset
toy_dataset = [
    ['Skirt', 'Sneakers', 'Scarf', 'Pants', 'Hat'],
    ['Sunglasses', 'Skirt', 'Sneakers', 'Pants', 'Hat'],
    ['Dress', 'Sandals', 'Scarf', 'Pants', 'Heels'],
    ['Dress', 'Necklace', 'Earrings', 'Scarf', 'Hat', 'Heels', 'Hat'],
    ['Earrings', 'Skirt', 'Skirt', 'Scarf', 'Shirt', 'Pants']
]


In [3]:
# Convert dataset into a format suitable for the apriori algorithm
te = TransactionEncoder()
te_ary = te.fit(toy_dataset).transform(toy_dataset)
df_toy = pd.DataFrame(te_ary, columns=te.columns_)


In [4]:
# Apply the Apriori algorithm
frequent_itemsets_toy = apriori(df_toy, min_support=0.4, use_colnames=True)


In [5]:
# Generate association rules
rules_toy = association_rules(frequent_itemsets_toy, metric="lift", min_threshold=1.0)


In [6]:
# Alternative approach: Implementing Apriori manually using itertools

from itertools import combinations
from collections import defaultdict

In [7]:
# Function to get frequent itemsets
def get_frequent_itemsets(transactions, min_support):
    item_counts = defaultdict(int)
    num_transactions = len(transactions)

In [11]:
# Ensure transactions is defined as a list of lists (or a list of sets)
transactions = [
    ["milk", "bread", "butter"],
    ["bread", "butter"],
    ["milk", "bread"]
]

# Initialize item_counts as a dictionary
item_counts = {}

# Count individual items
for transaction in transactions:
    for item in set(transaction):
        item_counts[frozenset([item])] = item_counts.get(frozenset([item]), 0) + 1

print(item_counts)

{frozenset({'milk'}): 2, frozenset({'butter'}): 2, frozenset({'bread'}): 3}


In [12]:
# Function to generate association rules
def generate_rules(frequent_itemsets, min_confidence=0.6):
    rules = []
    for itemset in frequent_itemsets.keys():
        if len(itemset) > 1:
            for i in range(1, len(itemset)):
                for antecedent in combinations(itemset, i):
                    antecedent = frozenset(antecedent)
                    consequent = itemset - antecedent
                    confidence = frequent_itemsets[itemset] / frequent_itemsets[antecedent]

                    if confidence >= min_confidence:
                        rules.append((antecedent, consequent, confidence))
    return rules

In [13]:
# Get frequent itemsets with a minimum support of 0.4
frequent_itemsets_toy = get_frequent_itemsets(toy_dataset, min_support=0.4)

In [15]:
print(frequent_itemsets_toy)


None


In [22]:
def find_frequent_itemsets(transactions, min_support=0.5):
    from mlxtend.frequent_patterns import apriori
    from mlxtend.preprocessing import TransactionEncoder
    import pandas as pd

    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    return frequent_itemsets


In [23]:
transactions = [['milk', 'bread'], ['milk', 'diaper'], ['bread', 'diaper', 'beer']]


In [24]:
!pip install mlxtend



In [26]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

# Sample transactions
transactions = [['milk', 'bread'], ['milk', 'diaper'], ['bread', 'diaper', 'beer']]

# Convert transactions to one-hot encoded DataFrame
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Now, apply apriori
from mlxtend.frequent_patterns import apriori
frequent_itemsets_toy = apriori(df, min_support=0.5, use_colnames=True)
print(frequent_itemsets_toy)


    support  itemsets
0  0.666667   (bread)
1  0.666667  (diaper)
2  0.666667    (milk)


In [3]:
from mlxtend.preprocessing import TransactionEncoder

In [4]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Define function to get frequent itemsets
def get_frequent_itemsets(dataset, min_support):
    # Convert transactions into a one-hot encoded DataFrame
    te = TransactionEncoder()
    te_ary = te.fit(dataset).transform(dataset)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    # Apply Apriori to find frequent itemsets
    return apriori(df, min_support=min_support, use_colnames=True)

# Run the function
frequent_itemsets_toy = get_frequent_itemsets(toy_dataset, min_support=0.4)


In [7]:
# Load the dataset
df_market = pd.read_csv("/content/Market_Basket_Optimisation - Market_Basket_Optimisation.csv", header=None)

# Display basic information and first few rows
df_market.info(), df_market.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7501 entries, 0 to 7500
Data columns (total 20 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       7501 non-null   object
 1   1       5747 non-null   object
 2   2       4389 non-null   object
 3   3       3345 non-null   object
 4   4       2529 non-null   object
 5   5       1864 non-null   object
 6   6       1369 non-null   object
 7   7       981 non-null    object
 8   8       654 non-null    object
 9   9       395 non-null    object
 10  10      256 non-null    object
 11  11      154 non-null    object
 12  12      87 non-null     object
 13  13      47 non-null     object
 14  14      25 non-null     object
 15  15      8 non-null      object
 16  16      4 non-null      object
 17  17      4 non-null      object
 18  18      3 non-null      object
 19  19      1 non-null      object
dtypes: object(20)
memory usage: 1.1+ MB


(None,
               0          1           2                 3             4   \
 0         shrimp    almonds     avocado    vegetables mix  green grapes   
 1        burgers  meatballs        eggs               NaN           NaN   
 2        chutney        NaN         NaN               NaN           NaN   
 3         turkey    avocado         NaN               NaN           NaN   
 4  mineral water       milk  energy bar  whole wheat rice     green tea   
 
                  5     6               7             8             9   \
 0  whole weat flour  yams  cottage cheese  energy drink  tomato juice   
 1               NaN   NaN             NaN           NaN           NaN   
 2               NaN   NaN             NaN           NaN           NaN   
 3               NaN   NaN             NaN           NaN           NaN   
 4               NaN   NaN             NaN           NaN           NaN   
 
                10         11     12     13             14      15  \
 0  low fat yogurt 

In [8]:
import plotly.express as px

# Flatten the dataset to count item occurrences
all_items = df_market.values.flatten()
all_items = pd.Series(all_items).dropna()

In [9]:
# Count occurrences of each unique item
item_counts = all_items.value_counts().reset_index()
item_counts.columns = ["Item", "Count"]

In [10]:
# Plot the top 20 most frequently purchased items
fig = px.bar(item_counts.head(20), x="Item", y="Count", title="Top 20 Most Frequently Purchased Items")
fig.show()

In [11]:
# Display the top 20 most frequently purchased items as a DataFrame
item_counts.head(20)

Unnamed: 0,Item,Count
0,mineral water,1788
1,eggs,1348
2,spaghetti,1306
3,french fries,1282
4,chocolate,1230
5,green tea,991
6,milk,972
7,ground beef,737
8,frozen vegetables,715
9,pancakes,713


In [13]:
transactions = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Diapers', 'Beer', 'Eggs'],
    ['Milk', 'Diapers', 'Beer', 'Cola'],
    ['Bread', 'Milk', 'Diapers', 'Beer'],
    ['Bread', 'Milk', 'Diapers', 'Cola']
]


In [15]:
# Convert to list of lists
transactions = df_market.values.tolist()


In [18]:
print(type(transactions))
print(type(transactions[0]))
print(transactions[:5])

<class 'list'>
<class 'list'>
[['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil'], ['burgers', 'meatballs', 'eggs', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], ['chutney', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], ['turkey', 'avocado', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]]


In [21]:
import pandas as pd

# If transactions is a DataFrame, convert it to a list of lists
if isinstance(transactions, pd.DataFrame):
    transactions = transactions.values.tolist()

# If transactions is a Pandas Series, convert it
elif isinstance(transactions, pd.Series):
    transactions = transactions.tolist()


In [23]:
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd

# Ensure transactions is a list of lists of strings
transactions = [[str(item) for item in sublist] for sublist in transactions]

# Initialize and fit TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)

# Convert to DataFrame
df = pd.DataFrame(te_ary, columns=te.columns_)
df.head()

Unnamed: 0,almonds,antioxydant juice,asparagus,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,body spray,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,True,True,False,True,False,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,True,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


#**Business Plan for Supermarket Based on Frequent Itemset Analysis**
1. Cross-Selling Strategy
Bundle high-frequency items:
- Products like mineral water, eggs, spaghetti, and chocolate are frequently purchased.
- Offer discounted combo deals (e.g., "Buy Spaghetti + Olive Oil & Get a Discount").
- Meal-based bundling:
- Items like spaghetti, ground beef, tomatoes, and olive oil are common ingredients for pasta dishes.
- Create a "Pasta Night Special" bundle.
- Snack bundle promotions:
- Items like French fries, chocolate, and cookies are often purchased together.
- A “Movie Night Combo” (French fries + Chocolate + Soft drinks) can drive sales.
2. In-Store Layout Optimization
- Place frequent item pairs close to each other to increase impulse purchases.
- For example, if spaghetti and olive oil are often bought together, ensure they are displayed in the same aisle.
- A “Frequently Bought Together” section can boost sales.
3. Pricing Strategy
- Dynamic pricing on essential items:
- Since mineral water, eggs, and spaghetti are high-frequency items, offering slight discounts or loyalty rewards can encourage repeat purchases.
- Premium pricing for complementary items:
- If olive oil is commonly bought with spaghetti, it can be priced slightly higher while keeping spaghetti at a lower price to attract customers.
4. Seasonal Promotions & Campaigns
- Holiday meal kits:
- During festive seasons, offer pre-packaged meal kits (e.g., “Holiday Breakfast Pack” with eggs, milk, and pancakes).
- Health-focused promotions:
- Items like green tea, low-fat yogurt, and frozen smoothies can be part of a "Healthy Lifestyle" campaign.
5. Personalized Marketing & Loyalty Programs
- Targeted email/SMS offers:
- If a customer frequently buys pasta ingredients, send them personalized discounts on sauces and seasonings.
- Loyalty rewards:
- Customers who repeatedly buy chocolate and cookies could earn a free item after a certain number of purchases.

#**Conclusion**

By leveraging frequent itemset insights, the supermarket can optimize its pricing, promotions, store layout, and marketing to increase sales and customer satisfaction.







