In [3]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Load the Online Retail dataset

df = pd.read_csv("OnlineRetail.csv",encoding='latin1')

# Drop rows with missing values
df.dropna(inplace=True)

# Filter for transactions from the United Kingdom only (optional)
df = df[df['Country'] == 'United Kingdom']

# Convert InvoiceNo to string
df['InvoiceNo'] = df['InvoiceNo'].astype(str)

# Remove canceled transactions (InvoiceNo starting with 'C')
df = df[~df['InvoiceNo'].str.startswith('C')]

# Create basket (transaction-item matrix)
basket = df.groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().fillna(0)

# Convert quantities to binary (1 if bought, 0 if not)
def encode_units(x):
    return 0 if x <= 0 else 1

basket_sets = basket.applymap(encode_units)

# Apply the Apriori algorithm
frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames=True)

# View the frequent itemsets
print("Frequent Itemsets:\n", frequent_itemsets.sort_values(by="support", ascending=False))

# Generate association rules (optional)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
print("\nSample Association Rules:\n", rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head())


  basket_sets = basket.applymap(encode_units)


Frequent Itemsets:
       support                                           itemsets
184  0.113160               (WHITE HANGING HEART T-LIGHT HOLDER)
79   0.086912                          (JUMBO BAG RED RETROSPOT)
149  0.084690                         (REGENCY CAKESTAND 3 TIER)
11   0.078083                    (ASSORTED COLOUR BIRD ORNAMENT)
123  0.077542                                    (PARTY BUNTING)
..        ...                                                ...
117  0.020121                     (PAINTED METAL PEARS ASSORTED)
221  0.020061  (LUNCH BAG SPACEBOY DESIGN , LUNCH BAG PINK PO...
42   0.020061                     (FELTCRAFT PRINCESS LOLA DOLL)
222  0.020001  (LUNCH BAG SUKI DESIGN , LUNCH BAG PINK POLKADOT)
215  0.020001  (LUNCH BAG APPLE DESIGN, LUNCH BAG RED RETROSPOT)

[235 rows x 2 columns]

Sample Association Rules:
                             antecedents                           consequents  \
0           (ALARM CLOCK BAKELIKE RED )          (ALARM CLOCK BAKEL